LMS, SIgned-LMSの挙動観察¶
入力信号を固定し、各フィルタ係数において、残差、残差勾配はどうなっているのか? まずはいつもどおりのLMSフィルターからはじめ、Signed-LMSの解析を行ってみる。
In [1]:
import sys
import math
import numpy as np
import matplotlib.pyplot as plt
# 固定した係数によるLMSフィルター処理
def lms_fixcoef(data, coef):
num_samples = data.size
filter_order = coef.size
pred = np.zeros(num_samples)
for smpl in range(filter_order, num_samples, 1):
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
return pred
In [2]:
# xtics, ytics で係数を生成してLMSフィルター処理を実行
# 各点における残差と残差勾配を出力
def lms_experience(data, xtics, ytics):
zerror = np.zeros((xtics.size, ytics.size))
zerrorgrad = np.zeros((xtics.size, ytics.size, 2))
for x in range(xtics.size):
for y in range(ytics.size):
coef = np.array([xtics[x], ytics[y]])
pred = lms_fixcoef(data, coef)
error = data - pred
zerror[x, y] = np.sqrt(np.mean(error ** 2))
zerrorgrad[x, y, 0] = -2 * np.mean(error[1:] * data[0:-1])
zerrorgrad[x, y, 1] = -2 * np.mean(error[2:] * data[0:-2])
return zerror, zerrorgrad
def ploterror(label, xtics, ytics, zmesh):
xmesh, ymesh = np.meshgrid(xtics, ytics)
# plt.pcolormesh(xmesh, ymesh, zmesh, label=label, cmap='Blues')
plt.contour(xmesh, ymesh, zmesh, levels=np.linspace(0.0, 1.5, 10).tolist())
plt.colorbar()
plt.xlabel('x0')
plt.ylabel('x1')
def plotgradient(label, xtics, ytics, zmeshvec):
xmesh, ymesh = np.meshgrid(xtics, ytics)
plt.quiver(xmesh, ymesh, zmeshvec[:,:,0], zmeshvec[:,:,1], label=label)
plt.xlabel('x0')
plt.ylabel('x1')
# 実験用入力データの作成
NUM_SAMPLES = 1024
lineardata = np.ones(NUM_SAMPLES)
sindata = np.ones(NUM_SAMPLES)
for i in range(NUM_SAMPLES):
sindata[i] = math.sin(2 * math.pi * 440 * i / 48000.0)
np.random.seed(0)
gaussnoise = np.random.normal(0, 0.1, NUM_SAMPLES)
np.random.seed(0)
laplacenoise = np.random.laplace(0, 0.1, NUM_SAMPLES)
# x,yの解析範囲
xtics = np.linspace(-2, 3, 20)
ytics = np.linspace(-2, 3, 20)
plt.plot(lineardata)
plt.show()
plt.plot(sindata)
plt.show()
plt.plot(gaussnoise)
plt.show()
plt.plot(laplacenoise)
plt.show()
plt.plot(sindata + gaussnoise)
plt.show()
plt.plot(sindata + laplacenoise)
plt.show()
In [3]:
zerror, zerrorgrad = lms_experience(lineardata, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [4]:
zerror, zerrorgrad = lms_experience(sindata, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [5]:
data = lineardata + gaussnoise
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [6]:
data = sindata + gaussnoise
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [7]:
data = lineardata + laplacenoise
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [8]:
data = sindata + laplacenoise
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
ここまでの考察¶
- どんな波形を突っ込んでも同じ残差分布になる(直線x0+x1=1上に解があるように見える)...
- 音声特有の相関の強みが出ている。ほぼ、x0+x1=1が理想的になる。x1(2個前データの係数)=1とするよりもx0(直前データの係数)=1の方が残差が小さい。これは、直前の信号により相関しているから。x0 = x1 = 0.5はその平均になり、これも残差は小さい。
- 勾配は正しく計算できてそう。
- 雑音を付加すると残差分布が楕円状に変わる
- 雑音が加わると、個々のサンプルの値よりも、サンプルの平均を取ったときに残差が小さくなる。
- つまり、x0 = x1 = 0.5が理想に近い。
- 正規(ガウス)雑音、ラプラス分布雑音でほぼ同じ傾向。しかしラプラスの方がやや裾が広い?
- ふつーのLMSはガウス雑音前提でやってるので注意。
- 勾配の向きが怪しい。グラフ左上、右下が残差勾配を下っている(=悪化している)様に見える。
- →実装不具合のようだ。予測時のフィルタ係数を内積と一致させる(coef[0] data[smpl - 2] + coef[1] data[smpl - 1] とさせ)ないと座標軸と一致しない(反転してしまう。)
- 雑音が加わると、個々のサンプルの値よりも、サンプルの平均を取ったときに残差が小さくなる。
→ 次はSigned-LMSで解析してみる。¶
In [9]:
# xtics, ytics で係数を生成してSigned-LMSフィルター処理を実行
# 各点における残差と残差勾配を出力
def signedlms_experience(data, xtics, ytics):
zerror = np.zeros((xtics.size, ytics.size))
zerrorgrad = np.zeros((xtics.size, ytics.size, 2))
for x in range(xtics.size):
for y in range(ytics.size):
coef = np.array([xtics[x], ytics[y]])
pred = lms_fixcoef(data, coef)
error = data - pred
zerror[x, y] = np.mean(np.abs(error))
zerrorgrad[x, y, 0] = - np.mean(np.sign(error[1:]) * data[0:-1])
zerrorgrad[x, y, 1] = - np.mean(np.sign(error[2:]) * data[0:-2])
return zerror, zerrorgrad
data = lineardata
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [10]:
data = sindata
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [11]:
data = lineardata + gaussnoise
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [12]:
data = sindata + gaussnoise
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [13]:
data = lineardata + laplacenoise
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
In [14]:
data = sindata + laplacenoise
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Error Gradient', xtics, ytics, zerrorgrad)
plt.title('Error and Error Gradient')
plt.show()
Signed-LMSの観察¶
- 雑音がない時、どれだけ解から離れていても勾配が同一
- 稜線が直線になった谷になっていると想像。
- 普通のLMSでは解から離れると2乗則?で勾配が大きくなる
- 逆に、どんなに近くても勾配が同一(LMSでは解の近傍で勾配が急激に消える)
- 残差の裾野がLMSに比べて広い。
- 雑音がある時でも、解の近傍で勾配が消失しにくい。
- 解から離れると雑音がない時の勾配に近づく。
- それでも尾根方向は勾配が消えている。
- やっぱ山の稜線に沿った勾配(左上から右下)の向きが気になる。。。これ学習しようとすると思いっきり悪い方向に進むよな。
- 勾配の向きが怪しいのは、勾配法では必ずしも解の稜線に沿った向きに動かないから?→実装バグだった。
- ここ あたりが参考になるかも?勾配は必ずしも極値点を向いてない。
→引き続き、LMSの勾配に観測分散行列(ヘッセ行列)の逆行列を掛けたらどうなるか見ていく¶
In [15]:
# xtics, ytics で係数を生成してLMSフィルター処理を実行
# 各点における残差と残差勾配を出力
def lms_experience_hessian(data, xtics, ytics):
zerror = np.zeros((xtics.size, ytics.size))
zerrorgrad = np.zeros((xtics.size, ytics.size, 2))
zerrorgradhessian = np.zeros((xtics.size, ytics.size, 2))
H = np.zeros((2, 2))
for smpl in range(2, data.size, 1):
xvec = data[smpl - 2:smpl].reshape((2,1))
H += xvec @ xvec.T
H /= data.size
Hinv = np.linalg.inv(H)
for x in range(xtics.size):
for y in range(ytics.size):
coef = np.array([xtics[x], ytics[y]])
pred = lms_fixcoef(data, coef)
error = data - pred
zerror[x, y] = np.sqrt(np.mean(error ** 2))
grad = -2 * np.array([np.mean(error[1:] * data[0:-1]), np.mean(error[2:] * data[0:-2])])
zerrorgrad[x, y, :] = grad
zerrorgradhessian[x, y, :] = (Hinv @ grad.reshape((2,1))).reshape(2)
return zerror, zerrorgrad, zerrorgradhessian, Hinv
xtics = np.linspace(-2, 3, 20)
ytics = np.linspace(-2, 3, 20)
# data = lineardata # 特異になる!
data = sindata # あやしい。なぜか(2,-1)付近に集まっとる
zerror, zerrorgrad, zerrorgradhessian, Hinv = lms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [16]:
data = lineardata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian, Hinv = lms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [17]:
data = sindata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian, Hinv = lms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [18]:
data = lineardata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian, Hinv = lms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [19]:
data = sindata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian, Hinv = lms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
ヘッセ行列を使った時の観察¶
- ノイズがない場合、逆行列が計算できない場合がある。
- 分散行列が特異になる。見たところ分散行列の全ての要素が同一。
- 理論的には確かに半正定値までしか保証されていないから、正則ではないのはありえる。
- 雑音を入れた場合は今の所逆行列が計算できてる。でも、理論上は半正定値なので常に計算できるとは限らず。
- サイン波ではノイズなしで逆行列を計算できたけど勾配が(2, -1)付近を向いてる。
- ノイズがある場合、全ての勾配が極値点(0.5, 0.5)付近を向いている。
- 勾配の尾根方向における勾配消失が見られず、純粋に極値からの距離で勾配が決まっている印象がある。
- これは最適化で有利に働きそう。
→引き続き、Signed-LMSに対して実験してみる¶
- 定義式通りインパルス関数を入れたらどうなってしまうんだろうか…(緊張)
In [20]:
# 平均0分散varのラプラス分布のP(|x| <= delta)を計算
def laplace_distribution_underdelta(var, delta):
return 1 - math.exp(-delta/var)
# ラプラス分布の0とみなす閾値
DELTA = 0.01
# xtics, ytics で係数を生成してLMSフィルター処理を実行
# 各点における残差と残差勾配を出力
def signedlms_experience_hessian(data, xtics, ytics):
zerror = np.zeros((xtics.size, ytics.size))
zerrorgrad = np.zeros((xtics.size, ytics.size, 2))
zerrorgradhessian = np.zeros((xtics.size, ytics.size, 2))
for x in range(xtics.size):
for y in range(ytics.size):
coef = np.array([xtics[x], ytics[y]])
pred = lms_fixcoef(data, coef)
error = data - pred
H = np.zeros((2, 2))
count = 0
for smpl in range(2, data.size, 1):
xvec = data[smpl - 2:smpl].reshape((2, 1))
H += xvec @ xvec.T
var = np.mean(np.abs(error))
H = H * laplace_distribution_underdelta(var, DELTA) / data.size
Hinv = np.linalg.inv(H)
zerror[x, y] = np.sqrt(np.mean(error ** 2))
grad = -np.array([np.mean(np.sign(error[1:]) * data[0:-1]), np.mean(np.sign(error[2:]) * data[0:-2])])
zerrorgrad[x, y, :] = grad
zerrorgradhessian[x, y, :] = (Hinv @ grad.reshape((2,1))).reshape(2)
return zerror, zerrorgrad, zerrorgradhessian
# data = lineardata # 特異になる!
data = sindata
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [21]:
data = lineardata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [22]:
data = sindata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [23]:
data = lineardata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [24]:
data = sindata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
SignedLMS・ヘッセ行列を使った時の観察¶
怪しいけど、残差0のときに和を取るのをやめて、残差0を取る確率で重み付けして分散行列を計算した。
- 尾根の平坦なところ(画面左上、右下)で大きい勾配が出るようになった。
- 反面、左下と右上で勾配が小さくなったように見える(大きさは変わってないけど、相対的に小さくなった?)
- 分散パラメータ0.3は小さすぎる気がする。(残差が0になる確率が0.93と大きすぎる。)離散のスケールにそぐわないような気がする。
- はじめから離散スケールで実験を行うべきかも。信号の振幅は[-32768, 32767]とする。
- いや、[-1,1]のスケールで、残差0確率が10%になるように分散を設定するのが正しいか。
- →連続型確率分布で、残差の絶対値が閾値delta以下になる確率を計算するようにした
→実際に学習をシュミレーションしてみる¶
- 初期値を色々変えたときにどこに行くか。
- ヘッセ行列を使った時に最適値に達する回数は?
In [25]:
# LMSフィルター処理
def lms(data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = data[smpl] - pred[smpl]
# 係数更新
coef += stepsize * error[smpl] * data[smpl - filter_order:smpl]
return pred, coefhistory
def lms_coefhistory_experience(data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = lms(data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
# 係数の軌跡をプロット
def plotcoefhistory(coefhistory):
# plt.plot(coefhistory[0,:], coefhistory[1,:],marker='o')
plt.plot(coefhistory[0,:], coefhistory[1,:])
STEPSIZE = 0.05
initcoef_list = [ np.array([-1.5, -1.5]),
np.array([0.5, -1.5]),
np.array([2.5, -1.5]),
np.array([2.5, 0.5]),
np.array([2.5, 2.5]),
np.array([0.5, 2.5]),
np.array([-1.5, 2.5]),
np.array([-1.5, 0.5]), ]
data = lineardata
predhistory, coefhistory = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [26]:
data = sindata
_, coefhistory = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [27]:
data = lineardata + gaussnoise
_, coefhistory = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [28]:
data = sindata + gaussnoise
_, coefhistory = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [29]:
data = lineardata + laplacenoise
_, coefhistory = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [30]:
data = sindata + laplacenoise
_, coefhistory = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [31]:
# Sined-LMSフィルター処理
def signedlms(data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = data[smpl] - pred[smpl]
# 係数更新
coef += stepsize * np.sign(error[smpl]) * data[smpl - filter_order:smpl]
return pred, coefhistory
def signedlms_coefhistory_experience(data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = signedlms(data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
SIGNEDLMS_STEPSIZE = 0.1
data = lineardata
_, coefhistory = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [32]:
data = sindata
_, coefhistory = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [33]:
data = lineardata + gaussnoise
_, coefhistory = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [34]:
data = sindata + gaussnoise
_, coefhistory = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [35]:
data = lineardata + laplacenoise
_, coefhistory = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [36]:
data = sindata + laplacenoise
_, coefhistory = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
zerror, zerrorgrad = signedlms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [37]:
# Hessian-LMSフィルター処理
def hessianlms(data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
S = np.eye(2)
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = data[smpl] - pred[smpl]
# 観測データのベクトル化
vdata = data[smpl - filter_order:smpl].reshape((filter_order, 1))
# 勾配
grad = error[smpl] * vdata
# 分散(情報)行列
S += vdata @ vdata.T
# ヘッセ行列の逆行列を更新
Hinv = np.linalg.inv(S / (smpl - filter_order + 1))
# 係数更新
coef += stepsize * (Hinv @ grad).flatten()
return pred, coefhistory
def hessianlms_coefhistory_experience(data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = hessianlms(data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
data = sindata
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian, _ = lms_experience_hessian(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [38]:
data = lineardata + gaussnoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian, _ = lms_experience_hessian(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [39]:
data = sindata + gaussnoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian, _ = lms_experience_hessian(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [40]:
data = lineardata + laplacenoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian, _ = lms_experience_hessian(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [41]:
data = sindata + laplacenoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience(data, initcoef_list, STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian, _ = lms_experience_hessian(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
ヘッセ行列込みのLMSで気付いたこと¶
- 解の軌跡が収束している。
- 雑音で揺れているものの、尾根に引っかからず、極小点(0.5, 0.5)にまっすぐ向かっているように見える。
- こころなしか収束が早い(まだ要観察。数回の試行の平均軌跡を見れば良い?)
定式化の再確認¶
入力データに雑音が乗ってる。 本来の定式化では、雑音は入力と独立に乗るはず。
<今、上で実験しているもの>
雑音-------
↓
入力 --> + --> <システム> --> 観測出力
<雑音が後で入るもの>
雑音-------------------------
↓
入力 --> <システム> --> + --> 観測出力
意図したものにならないと、システムは雑音に対しても係数を畳み込んでしまうから雑音と相関を持つのでは? →下で実験までしたけど、雑音が後に入るのは間違っていそう。観測できる入力には雑音が乗っている。観測とフィルタを畳みこむ必要がある。
In [42]:
# LMSフィルター処理(リファレンス出力に対する等化)
def lmsref(reference, data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = reference[smpl] - pred[smpl]
# 係数更新
coef += stepsize * error[smpl] * data[smpl - filter_order:smpl]
return pred, coefhistory
def lmsref_coefhistory_experience(reference, data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = lmsref(reference, data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
data = lineardata
reference = data
predhistory, coefhistory = lmsref_coefhistory_experience(reference, data, initcoef_list, STEPSIZE)
zerror, zerrorgrad = lms_experience(data, xtics, ytics)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgrad)
plt.show()
In [43]:
# Hessian-SignedLMSフィルター処理
def hessiansignedlms(data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
S = np.eye(2)
Hinv = np.eye(2)
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = data[smpl] - pred[smpl]
# 観測データのベクトル化
vdata = data[smpl - filter_order:smpl].reshape((filter_order, 1))
# 勾配
grad = np.sign(error[smpl]) * vdata
# 確率で重み付けした情報行列の計算
errvar = np.abs(error[smpl])
# ヘッセ行列の逆行列を更新
hgrad = Hinv @ grad
tau = np.max([1 / (smpl - filter_order + 1), 0.01])
Hinv += tau * (Hinv - (hgrad @ hgrad.T) * laplace_distribution_underdelta(errvar, DELTA))
coef += stepsize * hgrad.flatten()
# S += (vdata @ vdata.T) * laplace_distribution_underdelta(errvar, DELTA)
# Hinv = np.linalg.inv(S / (smpl - filter_order + 1))
# 係数更新
# coef += stepsize * (Hinv @ grad).flatten()
return pred, coefhistory
def hessiansignedlms_coefhistory_experience(data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = hessiansignedlms(data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
HESSIANSIGNEDLMS_STEPSIZE = SIGNEDLMS_STEPSIZE / 50
data = sindata
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
ステップサイズを小さく取ったら発散しなくなった。¶
- NLMSみたくステップサイズの収束が保証されている範囲を知りたい。
In [44]:
data = lineardata + gaussnoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [45]:
data = sindata + gaussnoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [46]:
data = lineardata + laplacenoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [47]:
data = sindata + laplacenoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
所感¶
- ふらつきが無くなったけど、収束は普通のSigned-LMSよりもおそいように見える。
- おおむねええ感じ。さて、本当にこれが既存でないのか確かめるべきでは?
- 有効性にあたり気になるところ
- LMSのヘッセ行列(確率による重み付けをしない)でいいんじゃないの?→軽く試したら全く収束しない。要精査。
- エントロピーや誤差はLMSより良いの?
情報行列の逆行列を誤っていたので再計算¶
- データベクトルの分散行列を計算していた。これは間違いで、勾配ベクトルの分散行列が正しい。
In [48]:
# xtics, ytics で係数を生成してLMSフィルター処理を実行
# 各点における残差と残差勾配を出力
def lms_experience_hessian_fix(data, xtics, ytics):
zerror = np.zeros((xtics.size, ytics.size))
zerrorgrad = np.zeros((xtics.size, ytics.size, 2))
zerrorgradhessian = np.zeros((xtics.size, ytics.size, 2))
H = np.zeros((2, 2))
for x in range(xtics.size):
for y in range(ytics.size):
coef = np.array([xtics[x], ytics[y]])
pred = lms_fixcoef(data, coef)
error = data - pred
for smpl in range(2, data.size, 1):
grad = error[smpl] * data[smpl - 2:smpl].reshape((2,1))
H += grad @ grad.T
H /= data.size
Hinv = np.linalg.inv(H)
zerror[x, y] = np.sqrt(np.mean(error ** 2))
grad = -2 * np.array([np.mean(error[1:] * data[0:-1]), np.mean(error[2:] * data[0:-2])])
zerrorgrad[x, y, :] = grad
zerrorgradhessian[x, y, :] = (Hinv @ grad.reshape((2,1))).reshape(2)
return zerror, zerrorgrad, zerrorgradhessian
xtics = np.linspace(-2, 3, 20)
ytics = np.linspace(-2, 3, 20)
# data = lineardata # 特異になる!
data = sindata # あやしい。なぜか(2,-1)付近に集まっとる
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [49]:
data = lineardata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [50]:
data = sindata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [51]:
data = lineardata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [52]:
data = sindata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
所感¶
- 平らなところで急激な勾配を示すようになった。
- もともと急激な勾配があるところの勾配が消滅。
このままSigned-LMSの検算も行っていくが、おそらく同一の結果になると思われる。 なぜなら、Singned-LMSの勾配はsign(err) dataだからで、(勾配 勾配転置)を計算すると、それはデータの分散行列を求めているのと全くおなじになる。理論はともあれもう一度検算。
In [53]:
# xtics, ytics で係数を生成してSignedLMSフィルター処理を実行
# 各点における残差と残差勾配を出力
def signedlms_experience_hessian_fix(data, xtics, ytics):
zerror = np.zeros((xtics.size, ytics.size))
zerrorgrad = np.zeros((xtics.size, ytics.size, 2))
zerrorgradhessian = np.zeros((xtics.size, ytics.size, 2))
H = np.zeros((2, 2))
for x in range(xtics.size):
for y in range(ytics.size):
coef = np.array([xtics[x], ytics[y]])
pred = lms_fixcoef(data, coef)
error = data - pred
for smpl in range(2, data.size, 1):
grad = np.sign(error[smpl]) * data[smpl - 2:smpl].reshape((2,1))
H += grad @ grad.T
H /= data.size
Hinv = np.linalg.inv(H)
zerror[x, y] = np.sqrt(np.mean(error ** 2))
grad = -2 * np.array([np.mean(error[1:] * data[0:-1]), np.mean(error[2:] * data[0:-2])])
zerrorgrad[x, y, :] = grad
zerrorgradhessian[x, y, :] = (Hinv @ grad.reshape((2,1))).reshape(2)
return zerror, zerrorgrad, zerrorgradhessian
xtics = np.linspace(-2, 3, 20)
ytics = np.linspace(-2, 3, 20)
# data = lineardata # 特異になる!
data = sindata # あやしい。なぜか(2,-1)付近に集まっとる
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [54]:
data = lineardata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [55]:
data = sindata + gaussnoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [56]:
data = lineardata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
In [57]:
data = sindata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian_fix(data, xtics, ytics)
ploterror('Error', xtics, ytics, zerror)
plotgradient('Hessian Gradient', xtics, ytics, zerrorgradhessian)
plt.title('Error and Hessian Error Gradient')
plt.show()
自然勾配を使ったLMSアルゴリズムに誤りを見つけたので修正して実験し直し。¶
In [58]:
# Hessian-LMSフィルター処理
def hessianlms_fix(data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
S = np.eye(2)
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = data[smpl] - pred[smpl]
# 観測データのベクトル化
vdata = data[smpl - filter_order:smpl].reshape((filter_order, 1))
# 勾配
grad = error[smpl] * vdata
# 経験フィッシャー情報行列に加算
S += grad @ grad.T
# 経験フィッシャー情報行列の逆行列を更新
Hinv = np.linalg.inv(S / (smpl - filter_order + 1))
# 係数更新
coef += stepsize * (Hinv @ grad).flatten()
return pred, coefhistory
def hessianlms_coefhistory_experience_fix(data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = hessianlms_fix(data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
data = sindata
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience_fix(data, initcoef_list, 0.05)
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [59]:
data = lineardata + gaussnoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience_fix(data, initcoef_list, 0.05)
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [60]:
data = sindata + gaussnoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience_fix(data, initcoef_list, 0.03)
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [61]:
data = lineardata + laplacenoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience_fix(data, initcoef_list, 0.02)
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [62]:
data = sindata + laplacenoise
hessian_predhistory, coefhistory = hessianlms_coefhistory_experience_fix(data, initcoef_list, 0.02)
zerror, zerrorgrad, zerrorgradhessian = lms_experience_hessian_fix(data, xtics, ytics)
predhistory, _ = lms_coefhistory_experience(data, initcoef_list, STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
修正版実装の所感¶
- 勾配が小さい領域(左下と右上)が初期値だと学習が遅い。
- 逆に、裾野が広い領域では学習が早く進む。(早すぎて極値付近で振動しているように見える)
続いて、SignedLMSの自然勾配を試す。
In [63]:
# Hessian-SignedLMSフィルター処理
def hessiansignedlms_fix(data, initcoef, stepsize):
num_samples = data.size
filter_order = initcoef.size
pred = np.zeros(num_samples)
error = np.zeros(num_samples)
coef = initcoef
coefhistory = np.zeros((filter_order, num_samples - filter_order))
S = np.eye(2)
Hinv = np.eye(2)
for smpl in range(filter_order, num_samples, 1):
# 係数保存
coefhistory[:, smpl-filter_order] = coef
# 予測
pred[smpl] = np.dot(coef, data[smpl - filter_order:smpl])
# 残差計算
error[smpl] = data[smpl] - pred[smpl]
# 観測データのベクトル化
vdata = data[smpl - filter_order:smpl].reshape((filter_order, 1))
# 勾配
grad = np.sign(error[smpl]) * vdata
# 確率で重み付けした情報行列の計算
# errvar = np.abs(error[smpl])
# ヘッセ行列の逆行列を更新
# hgrad = Hinv @ grad
# tau = np.max([1 / (smpl - filter_order + 1), 0.01])
# Hinv += tau * (Hinv - (hgrad @ hgrad.T) * laplace_distribution_underdelta(errvar, DELTA))
# coef += stepsize * hgrad.flatten()
S += vdata @ vdata.T
Hinv = np.linalg.inv(S / (smpl - filter_order + 1))
# 係数更新
coef += stepsize * (Hinv @ grad).flatten()
return pred, coefhistory
def hessiansignedlms_coefhistory_experience(data, initcoef_list, stepsize):
filter_order = initcoef_list[0].size
listsize = len(initcoef_list)
coefhistory = np.zeros((listsize, filter_order, data.size - filter_order))
predhistory = np.zeros((listsize, data.size))
for i in range(listsize):
predhistory[i,:], coefhistory[i,:,:] = hessiansignedlms(data, initcoef_list[i].copy(), stepsize)
return predhistory, coefhistory
HESSIANSIGNEDLMS_STEPSIZE = SIGNEDLMS_STEPSIZE / 50
data = sindata
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [64]:
data = lineardata + gaussnoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [65]:
data = sindata + gaussnoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [66]:
data = lineardata + laplacenoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
In [67]:
data = sindata + laplacenoise
hessian_predhistory, coefhistory = hessiansignedlms_coefhistory_experience(data, initcoef_list, HESSIANSIGNEDLMS_STEPSIZE)
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian(data, xtics, ytics)
predhistory, _ = signedlms_coefhistory_experience(data, initcoef_list, SIGNEDLMS_STEPSIZE)
for i in range(len(initcoef_list)):
plotcoefhistory(coefhistory[i])
ploterror('Error', xtics, ytics, zerror)
plotgradient('Gradient', xtics, ytics, zerrorgradhessian)
plt.show()
for i in range(len(initcoef_list)):
error = predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
for i in range(len(initcoef_list)):
error = hessian_predhistory[i,:] - data
plt.plot(np.abs(error[:400]))
plt.show()
plt.hist((predhistory - data).flatten(), bins=500, histtype='step')
plt.hist((hessian_predhistory - data).flatten(), bins=500, histtype='step')
plt.show()
所感¶
- 残差の分布としてはいい感じ。ナイーブなSignedLMSよりは良い。
- 左下と右上のように、最初の収束が遅い場合がある。
- 学習率を大きめにすると発散するケース多数。
- 正則化(分散行列に係数*単位行列を加算)かけたほうがいいかもしれない。
- しかしそうした場合の更新式がどうなるか…。
情報共有用のデータプロット¶
In [103]:
data = sindata + laplacenoise
zerror, zerrorgrad, zerrorgradhessian = signedlms_experience_hessian_fix(data, xtics, ytics)
plt.plot(data)
plt.title('Input data')
plt.savefig('sin_laplace_input.png')
plt.show()
xmesh, ymesh = np.meshgrid(xtics, ytics)
plt.figure(figsize=(10, 6))
plt.contour(xmesh, ymesh, zerror, levels=np.linspace(0.0, 3.0, 10).tolist(), linestyles='dashed')
plt.colorbar(label='Error')
plt.quiver(xmesh, ymesh, zerrorgrad[:,:,0], zerrorgrad[:,:,1], label='SignedLMS Gradient', color='black')
plt.xlabel('h0')
plt.ylabel('h1')
plt.legend()
plt.title('Error and Gradient')
plt.savefig('error_and_gradient.png')
plt.show()
plt.figure(figsize=(10, 6))
plt.contour(xmesh, ymesh, zerror, levels=np.linspace(0.0, 3.0, 10).tolist(), linestyles='dashed')
plt.colorbar(label='Error')
plt.quiver(xmesh, ymesh, zerrorgrad[:,:,0], zerrorgrad[:,:,1], label='SignedLMS Gradient', color='black')
plt.quiver(xmesh, ymesh, zerrorgradhessian[:,:,0], zerrorgradhessian[:,:,1], label='SignedLMS Natural Gradient', color='red')
plt.xlabel('h0')
plt.ylabel('h1')
plt.legend()
plt.title('Error and Gradient')
plt.savefig('error_and_natural_gradient.png')
plt.show()
In [ ]:
In [ ]: